### Load required libraries
library(readxl)
library(tidyverse)
library(purrr)
library(dplyr)
library(knitr)
library(DT)
library(httr)
library(jsonlite)
library(stringr)
library(GSEABase)
library(org.Hs.eg.db)
### Load functions
source(file = "../02_functions.R")
### Define paths
current_dir <- getwd()
data_path <- file.path(current_dir,
"../../data/_raw")
results_pc_path <- file.path(current_dir,
"../../results/patient_characteristics/output")
results_fip_path <- file.path(current_dir,
"../../results/somatic_mutation_analysis/functional_impact_prediction/output")
results_ubgsea_path <- file.path(current_dir,
"../../results/somatic_mutation_analysis/ubiquitin_gsea/output")
figures_ubgsea_path <- file.path(current_dir,
"../../results/somatic_mutation_analysis/ubiquitin_gsea/figures")
maf_df_path <- file.path(results_fip_path, "all_mutations.csv") # All mutations df
nonsyn_df_path <- file.path(results_fip_path, "nonsyn_mutations.csv") # Nonsyn mutations df
lof_df_path <- file.path(results_fip_path, "lof_mutations.csv") # LoF mutations df
# Sup1 data
sup1_response_path <- file.path(results_pc_path,
"sup1_response.csv")
### Read data & wrangle
# Read 3 lists of mutations
# All
maf_df_annotated <- read.csv(maf_df_path, header = TRUE)[ , -1]
list_of_all_mutations <- split(maf_df_annotated, maf_df_annotated$sample_id
)
# Nonsyn
nonsyn_df_annotated <- read.csv(nonsyn_df_path, header = TRUE)[ , -1]
list_of_nonsyn_mutations <- split(nonsyn_df_annotated, nonsyn_df_annotated$sample_id)
# LoF
lof_df_annotated <- read.csv(lof_df_path, header = TRUE)[ , -1]
list_of_lof_mutations <- split(lof_df_annotated, lof_df_annotated$sample_id)
# Get the list of patient IDs
sample_id_list <- unique(maf_df_annotated$sample_id)
## Read sup1_response.csv
sup1_response_df <- read.csv(sup1_response_path)[ , -1] # Drop first column
# df to merge response info later
response_df <- sup1_response_df %>%
dplyr::rename(sample_id = Sample.ID,
patient_response = Patient.Response) %>%
dplyr::select(sample_id, patient_response)
# Create a list of Responders and Non-responders
sample_id_lists <- split(sup1_response_df$Sample.ID,
sup1_response_df$Patient.Response) # Split the "Sample.ID" values into lists based on "Patient.Response"
r_sample_ids <- sample_id_lists$R
nr_sample_ids <- sample_id_lists$NR
# Define output path
mm909_sum_plot_path <- file.path(figures_ubgsea_path,
"mm909_sum_plot.png")
# Summary plot
mm909_sum_plot <- sum_plot(all_df_ubiquitin, nonsyn_df_ubiquitin, lof_df_ubiquitin, mm909_sum_plot_path)
# Display
print(mm909_sum_plot)
Sample MM909_47 does not have any Ubiquitin-related gene.
Consists of gene sets derived from the Gene Ontology (GO) annotations. Here, we focus on Gene sets derived from the GO Biological Process (BP) ontology.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_c5bp_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_c5bp_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_c5bp_lof.csv")
# # Perform FORA analysis for C5:BP Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C5", subcategory="BP")
# Load fora results
fora_results_c5bp_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c5bp_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c5bp_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
# Define output path
mm909_c5bp_all_plot_path <- file.path(figures_ubgsea_path,
"mm909_c5bp_all_plot.png")
mm909_c5bp_all_plot <- plot_top5(response_df, fora_results_c5bp_all_df, mm909_c5bp_all_plot_path, "FORA with top C5 BP across samples (All mutations)", "Top C5 BP Gene Sets")
print(mm909_c5bp_all_plot)
# Define output path
mm909_c5bp_nonsyn_plot_path <- file.path(figures_ubgsea_path,
"mm909_c5bp_nonsyn_plot.png")
mm909_c5bp_nonsyn_plot <- plot_top5(response_df, fora_results_c5bp_nonsyn_df, mm909_c5bp_nonsyn_plot_path, "FORA with top C5 BP across samples (Nonsyn mutations)", "Top C5 BP Gene Sets")
print(mm909_c5bp_nonsyn_plot)
# Define output path
mm909_c5bp_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_c5bp_lof_plot.png")
mm909_c5bp_lof_plot <- plot_top5(response_df, fora_results_c5bp_lof_df, mm909_c5bp_lof_plot_path, "FORA with top C5 BP across samples (LoF mutations)", "Top C5 BP Gene Sets")
print(mm909_c5bp_lof_plot)
C5 consists of gene sets derived from the Gene Ontology (GO) annotations. Here, we focus on Gene sets derived from the GO Cellular Component (CC) ontology.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_c5cc_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_c5cc_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_c5cc_lof.csv")
# # Perform FORA analysis for H Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C5", subcategory="CC")
# Load fora results
fora_results_c5cc_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c5cc_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c5cc_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
# Define output path
mm909_c5cc_proteasome_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_c5cc_proteasome_lof_plot.png")
proteasome_genesets = c("GOCC_PROTEASOME_ACCESSORY_COMPLEX", "GOCC_PROTEASOME_COMPLEX", "GOCC_PROTEASOME_CORE_COMPLEX", "GOCC_PROTEASOME_CORE_COMPLEX_ALPHA_SUBUNIT_COMPLEX", "GOCC_PROTEASOME_CORE_COMPLEX_BETA_SUBUNIT_COMPLEX", "GOCC_PROTEASOME_REGULATORY_PARTICLE", "GOCC_PROTEASOME_REGULATORY_PARTICLE_BASE_SUBCOMPLEX", "GOCC_PROTEASOME_REGULATORY_PARTICLE_LID_SUBCOMPLEX")
mm909_c5cc_proteasome_lof_plot <- plot_geneset(proteasome_genesets, response_df, fora_results_c5cc_lof_df, mm909_c5cc_proteasome_lof_plot_path, "FORA with PROTEASOME-related C5 CC Gene Sets across samples (LoF mutations)", "PROTEASOME Gene Sets")
print(mm909_c5cc_proteasome_lof_plot)
Hallmark gene sets summarize and represent specific well-defined biological states or processes and display coherent expression.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_h_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_h_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_h_lof.csv")
# # Perform FORA analysis for H Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="H")
# Load fora results
fora_results_h_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_h_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_h_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
Not enough data
# Define output path
mm909_h_nonsyn_plot_path <- file.path(figures_ubgsea_path,
"mm909_h_nonsyn_plot.png")
mm909_h_nonsyn_plot <- plot_top5(response_df, fora_results_h_nonsyn_df, mm909_h_nonsyn_plot_path, "FORA with top H across samples (Nonsyn mutations)", "Top H Gene Sets")
print(mm909_h_nonsyn_plot)
# Define output path
mm909_h_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_h_lof_plot.png")
mm909_h_lof_plot <- plot_top5(response_df, fora_results_h_lof_df, mm909_h_lof_plot_path, "FORA with top H across samples (LoF mutations)", "Top H Gene Sets")
print(mm909_h_lof_plot)
Filter specifically for HALLMARK_APOPTOSIS gene set.
# Define output path
mm909_h_apoptosis_plot_path <- file.path(figures_ubgsea_path,
"mm909_h_apoptosis_plot.png")
mm909_h_apoptosis_plot <- plot_geneset(c("HALLMARK_APOPTOSIS"), response_df, fora_results_h_lof_df, mm909_h_apoptosis_plot_path, "FORA with HALLMARK_APOPTOSIS across samples (LoF mutations)", "HALLMARK_APOPTOSIS Gene Set")
print(mm909_h_apoptosis_plot)
Gene sets that represent signatures of cellular pathways which are often dis-regulated in cancer. The majority of signatures were generated directly from microarray data from NCBI GEO or from internal unpublished profiling experiments involving perturbation of known cancer genes.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_c6_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_c6_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_c6_lof.csv")
# # Perform FORA analysis for C6 Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C6")
# Load fora results
fora_results_c6_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c6_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c6_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
Not enough data
Not enough data
Not enough data
Gene sets that represent cell states and perturbations within the immune system.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_c7_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_c7_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_c7_lof.csv")
# # Perform FORA analysis for C7 Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C7")
# Load fora results
fora_results_c7_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c7_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c7_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
# Define output path
mm909_c7_all_plot_path <- file.path(figures_ubgsea_path,
"mm909_c7_all_plot.png")
mm909_c7_all_plot <- plot_top5(response_df, fora_results_c7_all_df, mm909_c7_all_plot_path, "FORA with top C7 across samples (All mutations)", "Top C7 Gene Sets")
print(mm909_c7_all_plot)
# Define output path
mm909_c7_nonsyn_plot_path <- file.path(figures_ubgsea_path,
"mm909_c7_nonsyn_plot.png")
mm909_c7_nonsyn_plot <- plot_top5(response_df, fora_results_c7_nonsyn_df, mm909_c7_nonsyn_plot_path, "FORA with top C7 across samples (Nonsyn mutations)", "Top C7 Gene Sets")
print(mm909_c7_nonsyn_plot)
# Define output path
mm909_c7_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_c7_lof_plot.png")
mm909_c7_lof_plot <- plot_top5(response_df, fora_results_c7_lof_df, mm909_c7_lof_plot_path, "FORA with top C7 across samples (LoF mutations)", "Top C7 Gene Sets")
print(mm909_c7_lof_plot)
I searched for the term “ubiquitin” in MSigDB and downloaded a GMT file with all the hits (Gene Sets). Number of GS: 101.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_ubiquitin_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_ubiquitin_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_ubiquitin_lof.csv")
# # Load GMT file as a GeneSetCollection object
# gene_set_collection <- getGmt(file.path(data_path, "genesets_ubiquitin.v2023.2.Hs.gmt"))
#
# # Initialize an empty list to store the results
# gene_sets_list <- list()
#
# # Iterate over each GeneSet in the GeneSetCollection
# for (setName in names(gene_set_collection)) {
# # Extract the GeneSet by its name
# geneSet <- gene_set_collection[[setName]]
#
# # Convert Hugo symbols to Ensembl IDs for this GeneSet
# ensembl_ids <- SYMBOLtoENSEMBL(geneIds(geneSet))
#
# # Ensure the result is a character vector of Ensembl Gene IDs
# ensembl_ids_vector <- as.character(ensembl_ids)
#
# # Store the converted IDs in the list, associated with the setName
# gene_sets_list[[setName]] <- ensembl_ids_vector
# }
#
# # Set names for each element in the list to match the setName
# names(gene_sets_list) <- names(gene_set_collection)
#
# # Perform FORA analysis for Ubiquitin Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, gene_sets_list=gene_sets_list)
# Load fora results
fora_results_ubiquitin_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_ubiquitin_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_ubiquitin_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
# Define output path
mm909_ubiquitin_all_plot_path <- file.path(figures_ubgsea_path,
"mm909_ubiquitin_all_plot.png")
mm909_ubiquitin_all_plot <- plot_top5(response_df, fora_results_ubiquitin_all_df, mm909_ubiquitin_all_plot_path, "FORA with top Ubiquitin GS across samples (All mutations)", "Top Ubiquitin Gene Sets")
print(mm909_ubiquitin_all_plot)
# Define output path
mm909_ubiquitin_nonsyn_plot_path <- file.path(figures_ubgsea_path,
"mm909_ubiquitin_nonsyn_plot.png")
mm909_ubiquitin_nonsyn_plot <- plot_top5(response_df, fora_results_ubiquitin_nonsyn_df, mm909_ubiquitin_nonsyn_plot_path, "FORA with top Ubiquitin GS across samples (Nonsyn mutations)", "Top Ubiquitin Gene Sets")
print(mm909_ubiquitin_nonsyn_plot)
# Define output path
mm909_ubiquitin_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_ubiquitin_lof_plot.png")
mm909_ubiquitin_lof_plot <- plot_top5(response_df, fora_results_ubiquitin_lof_df, mm909_ubiquitin_lof_plot_path, "FORA with top Ubiquitin GS across samples (LoF mutations)", "Top Ubiquitin Gene Sets")
print(mm909_ubiquitin_lof_plot)
Gene sets in this collection are curated from various sources, including online pathway databases and the biomedical literature. Many sets are also contributed by individual domain experts. Here, we focus on Chemical and genetic perturbations (CGP).
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_c2cgp_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_c2cgp_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_c2cgp_lof.csv")
# # Perform FORA analysis for C2_CGP Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C2", subcategory="CGP")
# Load fora results
fora_results_c2cgp_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c2cgp_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c2cgp_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
# Define output path
mm909_c2cgp_all_plot_path <- file.path(figures_ubgsea_path,
"mm909_c2cgp_all_plot.png")
mm909_c2cgp_all_plot <- plot_top5(response_df, fora_results_c2cgp_all_df, mm909_c2cgp_all_plot_path, "FORA with top C2:CGP across samples (All mutations)", "Top C2:CGP Gene Sets")
print(mm909_c2cgp_all_plot)
# Define output path
mm909_c2cgp_nonsyn_plot_path <- file.path(figures_ubgsea_path,
"mm909_c2cgp_nonsyn_plot.png")
mm909_c2cgp_nonsyn_plot <- plot_top5(response_df, fora_results_c2cgp_nonsyn_df, mm909_c2cgp_nonsyn_plot_path, "FORA with top C2:CGP across samples (Nonsyn mutations)", "Top C2:CGP Gene Sets")
print(mm909_c2cgp_nonsyn_plot)
# Define output path
mm909_c2cgp_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_c2cgp_lof_plot.png")
mm909_c2cgp_lof_plot <- plot_top5(response_df, fora_results_c2cgp_lof_df, mm909_c2cgp_lof_plot_path, "FORA with top C2:CGP across samples (LoF mutations)", "Top C2:CGP Gene Sets")
print(mm909_c2cgp_lof_plot)
Gene sets in this collection are curated from various sources, including online pathway databases and the biomedical literature. Many sets are also contributed by individual domain experts. Here, we focus on Canonical pathways (CP), specifically Canonical Pathways gene sets derived from the KEGG MEDICUS pathway database.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_keggmed_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_keggmed_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_keggmed_lof.csv")
# # Load GMT file as a GeneSetCollection object
# gene_set_collection <- getGmt(file.path(data_path, "c2.cp.kegg_medicus.v2023.2.Hs.entrez.gmt"))
#
# # Initialize an empty list to store the results
# gene_sets_list <- list()
#
# # Iterate over each GeneSet in the GeneSetCollection
# for (setName in names(gene_set_collection)) {
# # Extract the GeneSet by its name
# geneSet <- gene_set_collection[[setName]]
#
# # Convert Entrez IDs to Ensembl IDs for this GeneSet
# ensembl_ids <- ENTREZtoENSEMBL(geneIds(geneSet))
#
# # Ensure the result is a character vector of Ensembl Gene IDs
# ensembl_ids_vector <- as.character(ensembl_ids)
#
# # Store the converted IDs in the list, associated with the setName
# gene_sets_list[[setName]] <- ensembl_ids_vector
# }
#
# # Set names for each element in the list to match the setName
# names(gene_sets_list) <- names(gene_set_collection)
#
# # Perform FORA analysis for KEGG_MEDICUS Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, gene_sets_list=gene_sets_list)
# Load fora results
fora_results_keggmed_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_keggmed_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_keggmed_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
# Define output path
mm909_keggmed_all_plot_path <- file.path(figures_ubgsea_path,
"mm909_keggmed_all_plot.png")
mm909_keggmed_all_plot <- plot_top5(response_df, fora_results_keggmed_all_df, mm909_keggmed_all_plot_path, "FORA with top C2:CP:KEGG_MEDICUS across samples (All mutations)", "Top C2:CP:KEGG_MEDICUS Gene Sets")
print(mm909_keggmed_all_plot)
Not enough data
# Define output path
mm909_keggmed_nonsyn_plot_path <- file.path(figures_ubgsea_path,
"mm909_keggmed_nonsyn_plot.png")
mm909_keggmed_nonsyn_plot <- plot_top5(response_df, fora_results_keggmed_nonsyn_df, mm909_keggmed_nonsyn_plot_path, "FORA with top C2:CP:KEGG_MEDICUS across samples (Nonsyn mutations)", "Top C2:CP:KEGG_MEDICUS Gene Sets")
print(mm909_keggmed_nonsyn_plot)
Not enough data
Gene sets in this collection are curated from various sources, including online pathway databases and the biomedical literature. Many sets are also contributed by individual domain experts. Here, we focus on Canonical pathways (CP), specifically Canonical Pathways gene sets derived from the Reactome pathway database.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_reactome_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_reactome_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_reactome_lof.csv")
# # Perform FORA analysis for C2_CGP Gene Sets
# results_list <- perform_fora_analysis(list_of_all_ub_mutations, list_of_nonsyn_ub_mutations, list_of_lof_ub_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C2", subcategory="CP:REACTOME")
# Load fora results
fora_results_reactome_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_reactome_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_reactome_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
# Define output path
mm909_reactome_all_plot_path <- file.path(figures_ubgsea_path,
"mm909_reactome_all_plot.png")
mm909_reactome_all_plot <- plot_top5(response_df, fora_results_reactome_all_df, mm909_reactome_all_plot_path, "FORA with top C2:CP:REACTOME across samples (All mutations)", "Top C2:CP:REACTOME Gene Sets")
print(mm909_reactome_all_plot)
# Define output path
mm909_reactome_nonsyn_plot_path <- file.path(figures_ubgsea_path,
"mm909_reactome_nonsyn_plot.png")
mm909_reactome_nonsyn_plot <- plot_top5(response_df, fora_results_reactome_nonsyn_df, mm909_reactome_nonsyn_plot_path, "FORA with top C2:CP:REACTOME across samples (Nonsyn mutations)", "Top C2:CP:REACTOME Gene Sets")
print(mm909_reactome_nonsyn_plot)
# Define output path
mm909_reactome_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_reactome_lof_plot.png")
mm909_reactome_lof_plot <- plot_top5(response_df, fora_results_reactome_lof_df, mm909_reactome_lof_plot_path, "FORA with top C2:CP:REACTOME across samples (LoF mutations)", "Top C2:CP:REACTOME Gene Sets")
print(mm909_reactome_lof_plot)
I searched for the term “proteasome” in MSigDB and downloaded a GMT file with all the hits (Gene Sets). Number of GS: 56.
# Define paths
fora_results_all_csv_path <- file.path(results_ubgsea_path, "fora_results_proteasome_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_ubgsea_path, "fora_results_proteasome_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_ubgsea_path, "fora_results_proteasome_lof.csv")
# # Load GMT file as a GeneSetCollection object
# gene_set_collection <- getGmt(file.path(data_path, "genesets_proteasome.v2023.2.Hs.gmt"))
#
# # Initialize an empty list to store the results
# gene_sets_list <- list()
#
# # Iterate over each GeneSet in the GeneSetCollection
# for (setName in names(gene_set_collection)) {
# # Extract the GeneSet by its name
# geneSet <- gene_set_collection[[setName]]
#
# # Convert Hugo Symbols to Ensembl IDs for this GeneSet
# ensembl_ids <- SYMBOLtoENSEMBL(geneIds(geneSet))
#
# # Ensure the result is a character vector of Ensembl Gene IDs
# ensembl_ids_vector <- as.character(ensembl_ids)
#
# # Store the converted IDs in the list, associated with the setName
# gene_sets_list[[setName]] <- ensembl_ids_vector
# }
#
# # Perform FORA analysis for PROTEASOME Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, gene_sets_list=gene_sets_list)
# Load fora results
fora_results_proteasome_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_proteasome_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_proteasome_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]
# Define output path
mm909_proteasome_all_plot_path <- file.path(figures_ubgsea_path,
"mm909_proteasome_all_plot.png")
mm909_proteasome_all_plot <- plot_top5(response_df, fora_results_proteasome_all_df, mm909_proteasome_all_plot_path, "FORA with top PROTEASOME GS across samples (All mutations)", "Top PROTEASOME Gene Sets")
print(mm909_proteasome_all_plot)
# Define output path
mm909_proteasome_nonsyn_plot_path <- file.path(figures_ubgsea_path,
"mm909_proteasome_nonsyn_plot.png")
mm909_proteasome_nonsyn_plot <- plot_top5(response_df, fora_results_proteasome_nonsyn_df, mm909_proteasome_nonsyn_plot_path, "FORA with top PROTEASOME GS across samples (Nonsyn mutations)", "Top PROTEASOME Gene Sets")
print(mm909_proteasome_nonsyn_plot)
# Define output path
mm909_proteasome_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_proteasome_lof_plot.png")
mm909_proteasome_lof_plot <- plot_top5(response_df, fora_results_proteasome_lof_df, mm909_proteasome_lof_plot_path, "FORA with top PROTEASOME GS across samples (LoF mutations)", "Top PROTEASOME Gene Sets")
print(mm909_proteasome_lof_plot)
Filter specifically for KEGG_PROTEASOME gene set.
# Define output path
mm909_keggproteasome_lof_plot_path <- file.path(figures_ubgsea_path,
"mm909_proteasome_lof_plot.png")
geneset = c("KEGG_PROTEASOME")
mm909_keggproteasome_lof_plot <- plot_geneset(geneset, response_df, fora_results_proteasome_lof_df, mm909_keggproteasome_lof_plot_path, "FORA with KEGG_PROTEASOME GS across samples (LoF mutations)", "KEGG_PROTEASOME Gene Set")
print(mm909_keggproteasome_lof_plot)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.7.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Copenhagen
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] org.Hs.eg.db_3.18.0 GSEABase_1.64.0 graph_1.80.0
## [4] annotate_1.80.0 XML_3.99-0.16.1 AnnotationDbi_1.64.1
## [7] IRanges_2.36.0 S4Vectors_0.40.2 Biobase_2.62.0
## [10] BiocGenerics_0.48.1 jsonlite_1.8.8 httr_1.4.7
## [13] DT_0.31 knitr_1.45 lubridate_1.9.3
## [16] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [19] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [22] tibble_3.2.1 ggplot2_3.4.4 tidyverse_2.0.0
## [25] readxl_1.4.3
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.0 farver_2.1.1 blob_1.2.4
## [4] Biostrings_2.70.2 bitops_1.0-7 fastmap_1.1.1
## [7] RCurl_1.98-1.14 digest_0.6.34 timechange_0.3.0
## [10] lifecycle_1.0.4 KEGGREST_1.42.0 RSQLite_2.3.5
## [13] magrittr_2.0.3 compiler_4.3.2 rlang_1.1.3
## [16] sass_0.4.8 tools_4.3.2 utf8_1.2.4
## [19] yaml_2.3.8 labeling_0.4.3 htmlwidgets_1.6.4
## [22] bit_4.0.5 withr_3.0.0 grid_4.3.2
## [25] fansi_1.0.6 xtable_1.8-4 colorspace_2.1-0
## [28] scales_1.3.0 cli_3.6.2 rmarkdown_2.25
## [31] crayon_1.5.2 ragg_1.2.7 generics_0.1.3
## [34] rstudioapi_0.15.0 tzdb_0.4.0 DBI_1.2.1
## [37] cachem_1.0.8 zlibbioc_1.48.0 cellranger_1.1.0
## [40] XVector_0.42.0 vctrs_0.6.5 hms_1.1.3
## [43] bit64_4.0.5 systemfonts_1.0.5 jquerylib_0.1.4
## [46] glue_1.7.0 stringi_1.8.3 gtable_0.3.4
## [49] GenomeInfoDb_1.38.6 munsell_0.5.0 pillar_1.9.0
## [52] htmltools_0.5.7 GenomeInfoDbData_1.2.11 R6_2.5.1
## [55] textshaping_0.3.7 evaluate_0.23 highr_0.10
## [58] png_0.1-8 memoise_2.0.1 bslib_0.6.1
## [61] xfun_0.42 pkgconfig_2.0.3